library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(readr)
library(knitr)
library(stringr)

data <- read_csv("Manhattan_Restaurant_Inspection_Results.csv")
## Rows: 94616 Columns: 27
## ── Column specification ──────────────────────────────────
## Delimiter: ","
## chr (17): DBA, BORO, BUILDING, STREET, CUISINE DESCRIPTION, INSPECTION DATE,...
## dbl  (9): CAMIS, ZIPCODE, PHONE, SCORE, Latitude, Longitude, Community Board...
## lgl  (1): Location Point1
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
grade_score_table <- data.frame(
  Grade = c("A", "B", "C"),
  `Score Range` = c("< 14", "14 - 27", "28 or more")
)
kable(
  grade_score_table,
  col.names = c("Grade", "Score Range"),
  caption = "NYC Restaurant Inspection Grade Score Ranges"
)
NYC Restaurant Inspection Grade Score Ranges
Grade Score Range
A < 14
B 14 - 27
C 28 or more
filtered_data <- data %>%
  select(`CUISINE DESCRIPTION`, SCORE) %>%
  filter(!is.na(`CUISINE DESCRIPTION`) & !is.na(SCORE)) %>%
  mutate(SCORE = as.numeric(SCORE)) %>%
  filter(!is.na(SCORE)) 

top_cuisines <- filtered_data %>%
  group_by(`CUISINE DESCRIPTION`) %>%
  summarise(avg_score = mean(SCORE, na.rm = TRUE)) %>%
  arrange(desc(avg_score)) %>%
  slice(1:35)

top_cuisine_data <- filtered_data %>%
  filter(`CUISINE DESCRIPTION` %in% top_cuisines$`CUISINE DESCRIPTION`)

top_cuisine_data <- top_cuisine_data %>%
  mutate(`CUISINE DESCRIPTION` = factor(`CUISINE DESCRIPTION`,
                                        levels = top_cuisines$`CUISINE DESCRIPTION`))

ggplot(top_cuisines, aes(x = reorder(`CUISINE DESCRIPTION`, -avg_score), y = avg_score)) +
  geom_bar(stat = "identity", fill = "orange", color = "red") +
  labs(title = "Top 35 Cuisines Types by Average Inspection Scores",
       x = "Cuisine Description",
       y = "Average Score") +
  theme_minimal()+
  theme(
    axis.text = element_text(size = 8),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

ggplot(top_cuisine_data, aes(x = `CUISINE DESCRIPTION`, y = SCORE)) +
  geom_boxplot(fill = "orange", color = "red") +
  labs(title = "Inspection Score Distribution for Top 35 Cuisines",
       x = "Cuisine Description",
       y = "Inspection Score") +
  theme_minimal()+
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text = element_text(size = 8),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

geo_coord <- data %>%
  filter(
    !is.na(SCORE), 
    !is.na(Latitude), 
    !is.na(Longitude)
  ) %>%
  mutate(
    SCORE = as.numeric(SCORE),
    info = str_c(
      DBA,
      paste("Cuisine: ", `CUISINE DESCRIPTION`), 
      paste("Score: ", SCORE),
      sep = "<br />"
    )
  ) %>%
  select(Longitude, Latitude, SCORE, info)

map_density <- plot_ly(
  data = geo_coord,
  lat = ~Latitude,
  lon = ~Longitude,
  z = ~SCORE,
  type = "densitymapbox",
  colorscale = "Viridis",
  radius = 5,
  hovertext = ~info,
  zmin = 0,
  zmax = 40 
)

map_density <- map_density %>%
  layout(
    title = "Density Plot of Inspection Scores of Restaurant in Manhattan",
    mapbox = list(
      style = 'carto-positron', 
      zoom = 13,
      center = list(lon = -73.9712, lat = 40.7831)
    ),
    margin = list(r = 0, t = 30, b = 0, l = 0)
  )

map_density

Go Home